/************************************************************************* 
Program name: clean_tradeline04.sas;
Objective: This program 
   * constructs the mop variable (a cleaned version of currmop)
   * sets negative date values to missing
   * creates the pay3059, pay6080, and pay90up variables, which indicate how many months an individual was 
     in these delinquency categories
   * creates indicator variables from the dispute codes that indicate whether a derogatory is associated with
     an account, a bankruptcy, a judgment, etc
**************************************************************************/

OPTIONS LINESIZE=79;

libname local '/data';
libname out '/data';

%include "/href/secure/student_loans/Programs/01_Transunion/tu_formats.sas";

* To be consistent with the earlier years, we put the tradeline files back into one file;
* Bob split them for space reasons but our data set is smaller;
data combine;
  set local.trader04 local.tradem04;
run;

* The 2003 and 2004 programs have KOB on them.  I delete KOB to be consistent with the other years.;
data out.cleantrade04;
   set combine (rename=(oldmop=currmop maxdelvl=c3 nummnths=c4 past3059=c5 past6089=c6 past90up=c7 oldbal=acctbal) drop=kob);

   length kob $1.;
   length maxdelvl mopmax mop 3.;
   length nummnths past3059 past6089 past90up paymnths
   	  paymop1 pay3059 pay6089 pay90119 pay120up paymax 3.;
   length maxdelam dtopen l 8.;
   length maj dis non clo col for rep crg jug oth ban pai mop 3.;

   /* Consolidates all the "8" (repossession) and "9" (bad debt / collection) values*/
   /* Recodes "UR" (unrated) and "UC" (unclassified) with numeric values*/
   if currmop in ('00','01','02','03','04','05','06','07','08','09')
      then mop=input(currmop,best12.);
   else if substr(currmop,1,1) eq '8' then mop=8;
   else if substr(currmop,1,1) eq '9' then mop=9;
   else if currmop eq 'UR' then mop=10;
   else if currmop eq 'UC' then mop=11;

   kob=substr(subcdt,1,1);

   /* the 2004 file does not include the teditseq variable so we use person instead*/
   keep acctype amtpast assoccd collfld crdtlim acctbal currmop
   dispcd dtclose dtopen dtpdout dtrept dtveri ecoa hicredit
   loantypt maxdelam maxdeldt maxdelvl nummnths past3059 past6089
   past90up paypat subcdt terms mop dup editseq 
   kob mopmax: paymnths: paymax: subname paymop1: pay3059: pay6089: pay90119: pay120up:
   maj dis non clo col for rep crg jug oth ban pai /*teditseq*/ person;

   /* This code dates from the days when mainframe storage was expensive and programmers needed to be
  parsimonious with their variable lengths.  Our dataset is smaller than Bob's so we don't really need
  this code, although I keep it.*/
  maxdelvl=c3;
  nummnths=c4;
  past3059=c5;
  past6089=c6;
  past90up=c7;

  /* Sets negative date values to missing*/
  /* These five data recodes were originally written as (for example) IF DTOPEN LE 0 LE 0 THEN DTOPEN=.  Getting rid
  of the second LE 0 does not affect the number of dates set to missing so I have removed it. KMP 6/07*/

  if dtopen le 0 then dtopen=.;
  if dtclose le 0 then dtclose=.;
  if dtpdout le 0 then dtpdout=.;
  if dtrept  le 0 then dtrept=.;
  if dtveri  le 0 then dtveri=.;

  if maxdelam le 0 then maxdelam=.;
  if maxdeldt le 0 then maxdeldt=.;
  if maxdelvl le 0 then maxdelvl=.;

  /*fix funny value of terms*/
  if terms eq "ZZZZZZZZ" then terms = .;

  /* Creates new variables that indicate how many times a payment was 30 to 59 days late, 60 to 89 days late, etc
  over the past 48 months.  These variables are based on the "paypat" variable, which contains 48 months
  of payment history.  These variables are only created for consumers with non-missing paypats.*/

  if substr(paypat,1,1) ne ' ' then do;
     pay3059=0;
     pay6089=0;
     pay90119=0;
     pay120up=0;
     paymnths=0;
     paymop1=0;
     mopmax=0;
     paymax=0;

     do k=1 to 48;
     	if substr(paypat,k,1) eq ' ' then go to cc;
   	paymnths=paymnths+1;
   	if substr(paypat,k,1) eq 'X' then go to aa;
   	paymax=k;
   	l=input(substr(paypat,k,1), best12.);
   	if l gt mopmax then mopmax=l;
   	if l eq 1 then paymop1=paymop1+1;
   	if l eq 2 then pay3059=pay3059+1;
   	if l eq 3 then pay6089=pay6089+1;
     	if l eq 4 then pay90119=pay90119+1;
	if l eq 5 then pay120up = pay120up+1;
	aa: end;
	cc: end;

     if mopmax le 0 then do;
     	pay3059=.;
	pay6089=.;
     	pay90119=.;
     	pay120up=.;
     	paymnths=.;
     	mopmax=.;
     	paymop1=.;
     	paymax=.;
     end;


	/*create a variable called paypat_24 that is substring of paypat.*/
/*  Paypat_24 is a string of payment history over the last 24 months (i.e. since the last survey). */
/*Because the most recent date of paypat depends on the date of the MOP variable, the length of paypat_24 depends on the date of the MOP variable.*/

	format mopdate yymmn6.;
	if mop ge 2 then do;
	   if dtclose gt 0 then mopdate = input(put(dtclose,6.),yymmn6.);
	   if dtclose le 0 and dtpdout gt 0 then mopdate = input(put(dtpdout,6.),yymmn6.);
	   if dtclose le 0 and dtpdout le 0 then mopdate = input(put(dtveri,6.),yymmn6.);
	end;
	if mop lt 2 then do;
	   if dtpdout gt 0 then mopdate = input(put(dtpdout,6.),yymmn6.);
	   if dtpdout le 0 and dtclose gt 0 then mopdate = input(put(dtclose,6.),yymmn6.);
	   if dtpdout le 0 and dtclose le 0 then mopdate = input(put(dtveri,6.),yymmn6.);
	end;
	lenpaypat = intck('month','01jun2003'd,mopdate)-1;
	if lenpaypat ge 1 and substr(paypat,1,1) ne ' ' then paypat_18 = substr(paypat,1,lenpaypat);

  	if substr(paypat_18,1,1) ne ' ' then do;
     	   pay3059_18=0;
     	   pay6089_18=0;
     	   pay90119_18=0;
     	   pay120up_18=0;
     	   paymnths_18=0;
     	   paymop1_18=0;
	   mopmax_18=0;
     	   paymax_18=0;

     	do k=1 to 18;
     	   if substr(paypat_18,k,1) eq ' ' then go to dd;
     	   paymnths_18=paymnths_18+1;
     	   if substr(paypat_18,k,1) eq 'X' then go to ff;
     	   paymax_18=k;
     	   l=input(substr(paypat_18,k,1), best12.);
     	   if l gt mopmax_18 then mopmax_18=l;			
     	   if l eq 1 then paymop1_18=paymop1_18+1;
     	   if l eq 2 then pay3059_18=pay3059_18+1;
     	   if l eq 3 then pay6089_18=pay6089_18+1;
     	   if l eq 4 then pay90119_18=pay90119_18+1;
	   if l eq 5 then pay120up_18 = pay120up_18+1;
     	   dd: end;
     	   ff: end;

    	   if mopmax_18 le 0 then do;	
     	      pay3059_18=.;
	      pay6089_18=.;
	      pay90119_18=.;
	      pay120up_18=.;
	      paymnths_18=.;
	      mopmax_18=.;
	      paymop1_18=.;
	      paymax_18=.;
           end;



	/************************DO DISPUTE RECODES*****************************/
	if dispcd ne '   ' then do;
	   array thr maj dis non clo col for rep crg jug oth ban pai;
	   do over thr;
   	      thr=0;
	   end;

	   /* Major derogatory associated with the account*/
	   IF DISPCD IN ('AM ','AS ','BKL','CCA','RLD','CLA','PRL','JUD','MOV',
  	   'RLP','RPO','RRE','RVD','RVN','RVP','RVR','WEP','CTS','FCL','SET',
  	   'PPL','STU','DLU','CCG','DLF','FFB','FPD','FRD','GOF','GMD','RDP',
  	   'LBR','PCL','ETD','ETA','SRG','SLD','FPI','BDC','BDI','BDM','CAG',
  	   'CCC','COL','CPP','CSF','GVC','RPF','RVC','RVE','PBD','POA','FPS',
  	   'CHK','AOA','PTD','VPD','FHA','CPM','REA','RAC','PBB','MCC','PPA',
  	   'SBB','ER ','PPD','RPD','SLP','PWG','PDD','CLS','SK ','PLP','ERX',
  	   'CTX','CBL','CBR','PNR','PFC','TTR','CBT','BKW','JUG','FTS','FTB',
  	   'ETS','ETB','ETI','IRE','IRO','IRB','CCD','CDD','CED','BKD','BCD',
  	   'BKC','BRR','BRC','CDL','CDC','CRD','CRL','CDR','CRC','CRR','CRV',
  	   'CDT','CTC','CRT','CTV','WPD','WPC','WCD','WRC','WRR') THEN MAJ=1;

	   /* Account information is in dispute*/
	   IF DISPCD IN ('AA ','AB ','AD ','AE ','AF ','AG ','AH ','AI ','AJ ',
  	   'AK ','AL ','AN ','AP ','AR ','AT ','RS ','DP ','DRP','CD ','DIS',
  	   'DRS','LBR','CNR','DRC','DRX','AJX','DGX','AID','CAD','CBD','CFD',
  	   'BKD','BCD','BRR','BRC','CDL','CDC','CRD','CRL','CDR','CRC','CRR',
  	   'CRV','CTD','CTC','CRT','CTV','WPD','WCD','WRC','WRR','DRG','DGR')
  	   THEN DIS=1;

	   /* Nonderogatory?*/
	   IF DISPCD IN ('AA ','AB ','AD ','AE ','AF ','AG ','AH ','AI ','AJ ',
  	   'AK ','AL ','AN ','AP ','AT ','DP ','DRP','CD ','DIS','AJP','STL',
  	   'TRF','RFN','CLO','INS','ASM','CBC','CBG','DRS','ELN','FHI','GEA',
  	   'GFS','GGG','GOP','LEA','PRD','SDL','SGL','NIR','TRL','UDL','VAL',
  	   'VAR','DEC','SLA','SPD','PPC','VCC','VPI','HEQ','INC','ICP','AOA',
  	   'CNR','CSA','OFS','ET ','AMD','AC ','BTA','IA ','DRG','IB ','NIB',
  	   'SPL','SIL','PAL','CTR','AAP','CLC','DRC','INP','AFR','DM ','CLB',
  	   'ACR','ACT','DRX','DDR','AJX','NIX','CWC','AND','NPA','INA','PDE',
  	   'CPB','PDI','AID','FTO','ETO','PLL','CAD','CBD','CFD','UGL')
  	   THEN NON=1;

	   /* Account closed*/
	   IF DISPCD IN ('STL','TRF','RFN','CLO','ASM','CBC','CBG','TRL','DEC',
  	   'ETD','ETA','CCC','OFS','ET ','AC ','BTA','PAL','CTR','AAP','CLC',
  	   'ACR','ACT','FTO','ETO','IRO','PLL','CAD','CBD','CCD','CDD','CED',
  	   'CFD','BCD','BKC','BRC','CDC','CRD','CRC','CRR','CTC','CRT','WPC',
  	   'WCD','WRC') THEN CLO=1;

	   /* Bankruptcy filing associated with the account*/
	   IF DISPCD IN ('BKL','WEP','FFB','REA','RAC','PBB','SBB','CBL','CBR',
  	   'CBT','BKW','CCD','CDD','CED','BKD','BCD','BKC','BRR','BRC','CDL',
  	   'CDC','CRD','CRL','CDR','CRC','CRR','CRV','CDT','CTC','CRT','CTV',
  	   'WPD','WPC','WCD','WRC','WRR') THEN BAN=1;

	   * Collection associated with the account*/
	   IF DISPCD IN ('CLA','RDP','PCL','CCC','COL','CPP','PBD','AOA','CPM',
  	   'CWC') THEN COL=1;

	   * Account balance paid to zero*/
	   IF DISPCD IN ('RLD','PPL','INS','RDP','PCL','PBD','CPM','ER ','PPD',
  	   'INP','RPD','PDD','ERX','PFC') THEN PAI=1;

	   * Foreclosure*/
	   IF DISPCD IN ('FCL','DLU','DLF','FPD','FRD','FPI','FPS','FHA')
  	   THEN FOR=1;

	   * Repossession*/
	   IF DISPCD IN ('AM ','RLD','RLP','RPO','RRE','RVD','RVN','RVP','RVR',
  	   'GVC','RPF','RVC','RVE','RPD','PDD','PFC','IRE','IRO','IRB')
  	   THEN REP=1;

	   * Chargeoff associated with the account*/
	   IF DISPCD IN ('PRL','SET','PPL','STU','ETD','ETA','SRG','SLD','BDC',
  	   'BDI','BDM','CAG','ET ','SLP','SK ','PLP','TTR','FTB','ETB','ETI')
  	   THEN CRG=1;

	   * Judgment associated with the account*/
	   IF DISPCD IN ('AS ','JUD','JUG') THEN JUG=1;

	   * Other*/
	   IF DISPCD IN ('DEC','DM ','CLB','NPA') THEN OTH=1;

	END;

	IF DTCLOSE LE 0 AND CLO EQ 1 THEN DTCLOSE=.;

run;

proc freq data = out.cleantrade04;
  tables mop kob pay3059 pay6089 pay90119 pay120up mopmax;
  format mop mop. kob $kob.;
run;

proc freq data = out.cleantrade04;
  tables MAJ DIS NON CLO COL FOR REP CRG JUG OTH BAN PAI;
run;

proc print data= out.cleantrade04 (obs=10);
run;


